1 Introduction

The new peak calling round applied on the previous notebook significantly increased the number of the features identified in our dataset. Therefore, we must need to repeat the standard downstream analysis, including data normalization, dimensionality reduction analysis and batch correction to account for this change in the number of features detected.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(Signac)
library(harmony)
library(tidyverse)
library(plotly)
library(plyr)

set.seed(333)

2.2 Parameters

cell_type = "GCBC"

color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", 
                    "#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0", 
                    "#1C8356", "#FEAF16", "#822E1C", "#C4451C", 
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", 
                    "#FBE426", "#16FF32", "black", "#3283FE",
                    "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB",
                    "#822E1C", "coral2", "#1CFFCE", "#1CBE4F",
                    "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",   
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
                    "#FEAF16", "#683B79", "#B10DA1", "#1C7F93", 
                    "#F8A19F", "dark orange", "#FEAF16", "#FBE426",  
                    "Brown")

# Paths
path_to_obj <- paste0(
  here::here("scATAC-seq/results/R_objects/level_4/"),
  cell_type,
  "/",
  cell_type,
  "_annotated_peak_calling_level_4.rds",
  sep = ""
)

path_to_save <- paste0(
  here::here("scATAC-seq/results/R_objects/level_4/"),
  cell_type,
  "/",
  cell_type,
  "_integration_peak_calling_level_4.rds",
  sep = ""
)

2.3 Load data

seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 142257 features across 21447 samples within 1 assay 
## Active assay: peaks_redefined (142257 features, 0 variable features)
##  1 dimensional reduction calculated: umap

3 Visualize UMAP without batch effect correction

# Normalization, dimensionality reduction 
seurat <- seurat %>%
  RunTFIDF() %>% 
  FindTopFeatures(min.cutoff = 10) %>%
  RunSVD() %>%
  RunUMAP(reduction = "lsi", dims = 2:30)

DepthCor(seurat)

DimPlot(seurat, 
        cols = color_palette,
        group.by = "level_5", 
        pt.size = 0.1)

# Visualize UMAP's confounders
confounders <- c("library_name", "sex", "age_group", "hospital", "assay")
umaps_before_integration <- purrr::map(confounders, function(x) {
  p <- DimPlot(seurat, group.by = x, pt.size = 0.1)
  p
})
names(umaps_before_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_before_integration$library_name + NoLegend()

print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_before_integration[2:length(umaps_before_integration)]
## $sex

## 
## $age_group

## 
## $hospital

## 
## $assay

4 Run and visualize Harmony’s integration

seurat <- RunHarmony(
  object = seurat,
  dims = 2:30,
  group.by.vars = 'gem_id',
  reduction = 'lsi',
  assay.use = 'peaks_redefined',
  project.dim = FALSE,
  max.iter.harmony = 20
)
    
# Non-linear dimension reduction and clustering
seurat <- RunUMAP(seurat, dims = 2:30, reduction = 'harmony')
p1 <- DimPlot(seurat, 
        cols = color_palette,
        group.by = "level_5", 
        pt.size = 0.1)
p2 <- DimPlot(seurat, 
        cols = c("darkblue","darkblue","darkblue",
                   "darkblue","lightblue","lightblue",
                   "red","red","red","gray","gray"),
       group.by  = "level_5", 
        pt.size = 0.1)
# Visualize UMAP's confounders
umaps_after_integration <- purrr::map(confounders, function(x) {
  p <- DimPlot(seurat, group.by = x, pt.size = 0.1)
  p
})
names(umaps_after_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_after_integration$library_name + NoLegend()

print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_after_integration[2:length(umaps_before_integration)]
## $sex

## 
## $age_group

## 
## $hospital

## 
## $assay

#
# Re-run UMAPs that you have accurate calculations for all UMAP(s)
seurat <- RunUMAP(reduction = 'harmony', 
                            seurat,
                            dims = 2:30,  
                            n.components = 3L)

# Extract tSNE information from Seurat Object
umap_1 <- seurat[["umap"]]@cell.embeddings[,1]
umap_2 <- seurat[["umap"]]@cell.embeddings[,2]
umap_3 <- seurat[["umap"]]@cell.embeddings[,3]

# Prepare a dataframe for cell plotting
plot.data <- FetchData(object = seurat, vars = c("UMAP_1", "UMAP_2", "UMAP_3", "level_5"))

# Make a column of row name identities (these will be your cell/barcode names)
plot.data$label <- paste(rownames(plot.data))

fig <- plot_ly(data = plot.data, 
        x = ~UMAP_1, y = ~UMAP_2, z = ~UMAP_3, 
        color = ~level_5, 
        colors = c("darkblue","darkblue","darkblue",
                   "darkblue","lightblue","lightblue",
                   "red","red","red","gray","gray"),
        type = "scatter3d", 
        mode = "markers", 
        marker = list(size = 2, width=1), # controls size of points
        text=~label, #This is that extra column we made earlier for which we will use for cell ID
        hoverinfo="text") #When you visualize your plotly object, hovering your mouse pointer over a point shows cell names

fig

5 Save

# Save integrated Seurat object
saveRDS(seurat, path_to_save)

6 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6           plotly_4.9.4.1       forcats_0.5.0        stringr_1.4.0        dplyr_1.0.7          purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         ggplot2_3.3.5        tidyverse_1.3.0      harmony_1.0          Rcpp_1.0.6           Signac_1.2.1.9003    SeuratWrappers_0.3.0 SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1           backports_1.2.0        fastmatch_1.1-0        igraph_1.2.6           lazyeval_0.2.2         splines_4.0.3          crosstalk_1.1.1        BiocParallel_1.22.0    listenv_0.8.0          SnowballC_0.7.0        scattermore_0.7        GenomeInfoDb_1.24.0    digest_0.6.27          htmltools_0.5.1.1      fansi_0.5.0            magrittr_2.0.1         tensor_1.5             cluster_2.1.0          ROCR_1.0-11            remotes_2.2.0          globals_0.14.0         Biostrings_2.56.0      modelr_0.1.8           matrixStats_0.59.0     docopt_0.7.1           spatstat.sparse_2.0-0  colorspace_2.0-2       rvest_0.3.6            blob_1.2.1             ggrepel_0.9.1          haven_2.3.1            xfun_0.18              sparsesvd_0.2          crayon_1.4.1           RCurl_1.98-1.2         jsonlite_1.7.2         spatstat.data_2.1-0    survival_3.2-7         zoo_1.8-9              glue_1.4.2             polyclip_1.10-0        gtable_0.3.0           zlibbioc_1.34.0        XVector_0.28.0         leiden_0.3.8           future.apply_1.7.0     BiocGenerics_0.36.1    abind_1.4-5            scales_1.1.1           DBI_1.1.0              miniUI_0.1.1.1        
##  [52] viridisLite_0.4.0      xtable_1.8-4           reticulate_1.20        spatstat.core_2.2-0    rsvd_1.0.3             stats4_4.0.3           htmlwidgets_1.5.3      httr_1.4.2             RColorBrewer_1.1-2     ellipsis_0.3.2         ica_1.0-2              pkgconfig_2.0.3        farver_2.1.0           dbplyr_1.4.4           ggseqlogo_0.1          uwot_0.1.10.9000       deldir_0.2-10          here_1.0.1             utf8_1.2.1             labeling_0.4.2         tidyselect_1.1.1       rlang_0.4.11           reshape2_1.4.4         later_1.2.0            cellranger_1.1.0       munsell_0.5.0          tools_4.0.3            cli_3.0.0              generics_0.1.0         broom_0.7.2            ggridges_0.5.3         evaluate_0.14          fastmap_1.1.0          yaml_2.2.1             goftest_1.2-2          fs_1.5.0               knitr_1.30             fitdistrplus_1.1-5     RANN_2.6.1             pbapply_1.4-3          future_1.21.0          nlme_3.1-150           mime_0.11              slam_0.1-48            RcppRoll_0.3.0         xml2_1.3.2             rstudioapi_0.12        compiler_4.0.3         png_0.1-7              spatstat.utils_2.2-0   reprex_0.3.0          
## [103] tweenr_1.0.2           stringi_1.6.2          RSpectra_0.16-0        lattice_0.20-41        Matrix_1.3-4           vctrs_0.3.8            pillar_1.6.1           lifecycle_1.0.0        BiocManager_1.30.10    spatstat.geom_2.2-0    lmtest_0.9-38          RcppAnnoy_0.0.18       data.table_1.14.0      cowplot_1.1.1          bitops_1.0-7           irlba_2.3.3            httpuv_1.6.1           patchwork_1.1.1        GenomicRanges_1.40.0   R6_2.5.0               bookdown_0.21          promises_1.2.0.1       KernSmooth_2.23-17     gridExtra_2.3          lsa_0.73.2             IRanges_2.22.1         parallelly_1.26.1      codetools_0.2-17       MASS_7.3-53            assertthat_0.2.1       rprojroot_2.0.2        withr_2.4.2            qlcMatrix_0.9.7        sctransform_0.3.2      Rsamtools_2.4.0        S4Vectors_0.26.0       GenomeInfoDbData_1.2.3 hms_0.5.3              mgcv_1.8-33            parallel_4.0.3         grid_4.0.3             rpart_4.1-15           rmarkdown_2.5          Rtsne_0.15             ggforce_0.3.3          lubridate_1.7.9        shiny_1.6.0